Update
Usual Filtering
df = df.query('calib_psfCandidate == 0.0')
df = df.query('deblend_nChild == 0.0')
df['ellip'] = np.hypot( df['ext_shapeHSM_HsmShapeRegauss_e1'] ,
df['ext_shapeHSM_HsmShapeRegauss_e2'] )
df = df.query('ellip < 2.0') # it was 1.5 before
#select only few columns after filtering:
cols_select = ['base_SdssCentroid_x', 'base_SdssCentroid_y',
'base_SdssCentroid_xSigma','base_SdssCentroid_ySigma',
'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2',
'base_SdssShape_flux']
df = df[cols_select]
# drop all nans
df = df.dropna()
# additional columns
df['radius'] = df.eval(""" ( (ext_shapeHSM_HsmSourceMoments_xx * ext_shapeHSM_HsmSourceMoments_yy) \
- (ext_shapeHSM_HsmSourceMoments_xy**2 ) )**0.25 """)
Shape filtering
https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb
df = df.query('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3')
df = df.query('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4')
df = df.query('ext_shapeHSM_HsmShapeRegauss_flag== 0.0')
Filter strongly lensed objects
# exclude strong lens objects <=154 distance
# The shape of lsst.fits file is 3998,3998 and center is 1699,1699.
df['x_center'] = 1699
df['y_center'] = 1699
df['distance'] = ( (df['x[0]'] - df['x_center'])**2 + (df['x[1]'] - df['y_center'])**2 )**0.5
df = df[df.distance > 154]
Imcat script
# create new columns and cleaning (four files)
lc -C -n fN -n id -N '1 2 x' -N '1 2 errx' -N '1 2 g' -n ellip -n flux -n radius < "${M9T}".txt | lc +all 'mag = %flux log10 -2.5 *' | cleancat 15 | lc +all -r 'mag' > "${M9C}".cat
# merge 4 catalogs
mergecats 5 "${MC}".cat "${M9C}".cat "${LC}".cat "${L9C}".cat > ${catalogs}/merge.cat &&
lc -b +all
'x = %x[0][0] %x[1][0] + %x[2][0] + %x[3][0] + 4 / %x[0][1] %x[1][1] + %x[2][1] + %x[3][1] + 4 / 2 vector'
'gm = %g[0][0] %g[1][0] + 2 / %g[0][1] %g[1][1] + 2 / 2 vector'
'gc = %g[2][0] %g[3][0] + 2 / %g[2][1] %g[3][1] + 2 / 2 vector'
'gmd = %g[0][0] %g[1][0] - 2 / %g[0][1] %g[1][1] - 2 / 2 vector'
'gcd = %g[2][0] %g[3][0] - 2 / %g[2][1] %g[3][1] - 2 / 2 vector'
< ${catalogs}/merge.cat > ${final}/final_${i}.cat
Notes
final_text.txt is created by imcat program after merging four lsst files (m,m9,l,l9) after cleaning.
import json, os,sys
import numpy as np
import pandas as pd
import seaborn as sns
import time
sns.set(color_codes=True)
import plotly
import ipywidgets
pd.set_option('display.max_columns',200)
time_start_notebook = time.time()
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
print([(x.__name__, x.__version__) for x in [np,pd,sns,plotly,ipywidgets]])
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;
def show_method_attributes(obj, ncols=7,start=None, inside=None):
""" Show all the attributes of a given method.
Example:
========
show_method_attributes(list)
"""
print(f'Object Type: {type(obj)}\n')
lst = [elem for elem in dir(obj) if elem[0]!='_' ]
lst = [elem for elem in lst
if elem not in 'os np pd sys time psycopg2'.split() ]
if isinstance(start,str):
lst = [elem for elem in lst if elem.startswith(start)]
if isinstance(start,tuple) or isinstance(start,list):
lst = [elem for elem in lst for start_elem in start
if elem.startswith(start_elem)]
if isinstance(inside,str):
lst = [elem for elem in lst if inside in elem]
if isinstance(inside,tuple) or isinstance(inside,list):
lst = [elem for elem in lst for inside_elem in inside
if inside_elem in elem]
return pd.DataFrame(np.array_split(lst,ncols)).T.fillna('')
g_sq = g00 g00 + g10 g10
gmd_sq = gmd0**2 + gmd1**2
!head -2 ../data/cleancat/final_text_cleancat15_000_199_ngals10k.txt
names = "fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1]"
print(names)
names = ['fN[0][0]','fN[1][0]','fN[2][0]','fN[3][0]',
'id[0][0]','id[1][0]','id[2][0]','id[3][0]',
'x[0]','x[1]',
'errx[0][0]','errx[0][1]','errx[1][0]','errx[1][1]','errx[2][0]',
'errx[2][1]','errx[3][0]','errx[3][1]',
'g[0][0]','g[0][1]','g[1][0]','g[1][1]','g[2][0]','g[2][1]','g[3][0]','g[3][1]',
'ellip[0][0]','ellip[1][0]','ellip[2][0]','ellip[3][0]',
'flux[0][0]','flux[1][0]','flux[2][0]','flux[3][0]',
'radius[0][0]','radius[1][0]','radius[2][0]','radius[3][0]',
'mag[0][0]','mag[1][0]','mag[2][0]','mag[3][0]',
'gm[0]','gm[1]','gc[0]', 'gc[1]',
'gmd[0]','gmd[1]','gcd[0]','gcd[1]']
file_path = '../data/cleancat/final_text_cleancat15_000_199_ngals10k.txt'
df = pd.read_csv(file_path,comment='#',engine='python',sep=r'\s\s+',
header=None,names=names)
print(df.shape)
# new columns
# df['g_sq'] = df['g[0][0]'] **2 + df['g[1][0]']**2 # only for imcat 00 and 10
# df['gmd_sq'] = df['gmd[0]'] **2 + df['gmd[1]']**2
df['g_sq'] = df['g[0][0]'] **2 + df['g[0][1]']**2
df['gmd_sq'] = df['gmd[0]'] **2 + df['gmd[1]']**2
df['gm_sq'] = df['gm[0]']**2 + df['gm[1]']**2
df['gc_sq'] = df['gc[0]']**2 + df['gc[1]']**2
df['mag_mono'] = (df['mag[0][0]'] + df['mag[1][0]'] ) / 2
df['mag_chro'] = (df['mag[2][0]'] + df['mag[3][0]'] ) / 2
df.head()
fig,ax = plt.subplots(1,2,figsize=(14,6))
df['gm_sq'].plot.hist(bins=50,ax=ax[0],color='b',label='mono')
ax[0].set_xlabel('gm_sq')
ax[0].legend()
ax[0].set_title('g-squared histogram for mono')
# gcsq
df['gc_sq'].plot.hist(bins=50,ax=ax[1],color='r',label='chro')
ax[1].set_xlabel('gc_sq')
ax[1].legend()
ax[1].set_title('g-squared histogram for chro')
# savefig
plt.savefig('images/gmsq_gcsq_hist.png')
plt.figure(figsize=(12,8))
sns.distplot(df['g_sq'],label='g_sq')
sns.distplot(df['gmd_sq'],label='gmd_sq')
plt.legend()
import plotly
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tls
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
def matrix_of_number_density_from_two_cols(df,xcol,ycol,N):
"""Create grid of number density of two columns
- Find the absolute max from two columns.
- Create N bins -absMax to +absMax.
- Return a matrix of N*N shape.
"""
from itertools import product
# derived variables
xlabel = xcol
ylabel = ycol
xlabel1 = xlabel + '_cat_labels'
ylabel1 = ylabel + '_cat_labels'
xlabel2 = xlabel + '_cat'
ylabel2 = ylabel + '_cat'
colname = 'cat_freq'
# take only xlabel and ylabel columns
dx = df[[xlabel, ylabel]].copy()
# get absolute maximum from two columns
tolerance = 0.0000001
max_abs_xcol_ycol = df[[xcol,ycol]].describe().iloc[[3,-1],:].abs().max().max()
# create 1d array with N+1 values to create N bins
# bins = np.linspace(-max_abs_xcol_ycol-tolerance, max_abs_xcol_ycol+tolerance,N+1)
bins = np.linspace(0, max_abs_xcol_ycol,N+1)
# create N bins
dx[xlabel1] = pd.cut(dx[xlabel], bins, labels=np.arange(N))
dx[ylabel1] = pd.cut(dx[ylabel], bins, labels=np.arange(N))
# count number of points in each bin
dx[colname] = dx.groupby([xlabel1,ylabel1])[xlabel1].transform('count')
# drop duplicates
dx1 = dx.drop_duplicates(subset=[xlabel1,ylabel1])[[xlabel1,ylabel1,colname]]
# use permutation to get the grid of N * N
perms = list(product(range(N), range(N)))
x = [i[0] for i in perms]
y = [i[1] for i in perms]
dx2 = pd.DataFrame({xlabel1: x, ylabel1: y, colname:0})
# update dx2 to merge frequency values
dx2.update(dx2.drop(colname,1).merge(dx1,how='left'))
dx2 = dx2.astype(int)
# z values to plot heatmap
z = dx2[colname].values.reshape(N,N)
z = z.T
return z
def transform_scale(z,transform='linear',scale=None):
"""Transform and scale given 1d numpy array.
transform: linear, log, sqrt, sinh, arcsinh
scale : minmax, zscale
"""
#==================================
# linear, log, sqrt, arcsinh, sinh
#
# we need linear tranform option to compare.
if transform == 'linear':
z = z
if transform == 'log':
z = np.log1p(z)
if transform == 'sqrt':
z = np.sqrt(z)
if transform == 'sinh':
z = np.sinh(z)
if transform == 'arcsinh':
z = np.arcsinh(z)
#===============================
# scaling minmax and zscale
if scale== 'minmax':
z = z / (z.max()-z.min())
if scale == 'zscale':
z = (z-z.mean()) / z.std()
return z
def plot_contour(Z,colorscale,x1y1x2y2=None,
title='Contour plot',
xlabel='',
ylabel=''):
"""Plot the contour plot.
Contour plot from given 2d array.
Example:
-----------
z = matrix_of_number_density_from_two_cols(df,'gsq','gmdsq',N)
z1 = transform_scale(z,transform=transform,scale=scale)
plot_contour(z1, colorscale='Viridis', x1y1x2y2=[10,0,99,90],
title=f'Contour plot: {transform}+{scale}',
xlabel='Bin number of gsq',
ylabel='Bin number of gmsq')
"""
trace1 = go.Contour(z=Z, colorscale=colorscale)
axis_layout = dict(
showticklabels = True
)
xaxis = {**axis_layout, **{'title':f'{xlabel}'}}
yaxis = {**axis_layout, **{'title':f'{ylabel}'}}
layout = go.Layout(
width=1000,
height=1000,
autosize=False,
title=f'{title} ',
xaxis = xaxis,
yaxis = yaxis,
)
data = [trace1]
if x1y1x2y2:
# center line
p2x1, p2y1 = 0,0
p2x2, p2y2 = 99,99
sc1 = go.Scatter(x=[p2x1,p2x2],
y=[p2y1,p2y2],
mode = 'lines+markers',
name = f'line ({p2x1},{p2y1}) to ({p2x2},{p2y2})')
sc2 = go.Scatter(x=[x1y1x2y2[0],x1y1x2y2[2]],
y=[x1y1x2y2[1],x1y1x2y2[3]],
mode = 'lines+markers',
name = f'line ({x1y1x2y2[0]},{x1y1x2y2[1]}) \
to ({x1y1x2y2[2]},{x1y1x2y2[3]})')
data = [trace1, sc1, sc2]
fig = go.Figure( data=data, layout=layout )
# update figure
fig.update_layout(
xaxis = dict(tickmode = 'linear',dtick = 5),
yaxis = dict(tickmode = 'linear',dtick = 5),
)
iplot(fig)
return None
N = 100
transform='log'
scale='zscale'
xcol,ycol = 'g_sq', 'gmd_sq'
z = matrix_of_number_density_from_two_cols(df,xcol,ycol,N)
z1 = transform_scale(z,transform=transform,scale=scale)
plot_contour(z1, colorscale='Viridis', x1y1x2y2=[10,0,99,90],
title=f'Contour plot: {transform}+{scale}',
xlabel=f'Bin number of {xcol}',
ylabel=f'Bin number of {ycol}')
N = 100
xcol = 'g_sq'
ycol = 'gmd_sq'
max_abs_xcol_ycol = df[[xcol,ycol]].abs().max().max()
print(max_abs_xcol_ycol)
df[df[xcol]==max_abs_xcol_ycol]
# create 1d array with N+1 values to create N bins
bins = np.linspace(0, max_abs_xcol_ycol,N+1)
# bins dict
bins_dict = {i:v for i,v in enumerate(bins)}
# create N bins
ser_ycol_bins = pd.cut(df[ycol], bins=bins,)
df['g_sq_bins'] = ser_ycol_bins
df[['g_sq','g_sq_bins']].head()
# bin 0 ==> 0 to 0.03994369
# bin 1 ==> 0.03994369 to 0.07988739
# bin 10 ==> 0.39943694 to 0.43938063
bins[10], bins[11]
What is the corresponding gmsq value to y-axis bin number 10 (11th bin)?
The 100*100 bin is created from absMax of g_sq and gmd_sq. Then the line 0 to absMax is divided into 100 parts and bins are created.
bin 10 for gmd_sq is 0.39943694 to 0.43938063
# take all points where gmd_sq > 10th bin
df_gmd_sq_lt_bin10 = df[df.gmd_sq > bins[10]]
print(df_gmd_sq_lt_bin10.shape)
df_gmd_sq_lt_bin10.head()
# fN[0][0] ==> lsst_mono_z1.5_000.fits
# fN[1][0] ==> lsst_mono90_z1.5_000.fits
#
# id[0][0] ==> id of given object for mono file number 0
# take only first file number 0 (it has m,m9,c and c9)
df_gmd_sq_lt_bin10_file0 = df_gmd_sq_lt_bin10[df_gmd_sq_lt_bin10['fN[0][0]'] == 0]
# add radius for mono
df_gmd_sq_lt_bin10_file0['radius_mono'] = \
(df_gmd_sq_lt_bin10_file0['radius[0][0]'] +
df_gmd_sq_lt_bin10_file0['radius[1][0]'] ) /2.0
print(df_gmd_sq_lt_bin10_file0.shape)
df_gmd_sq_lt_bin10_file0.head()
For example, take points:
Lower line below the diagonal line
point on x-axis: x1,y1 = 10,0
point on y-axis: x2,y2 = 99,90
here 20,0,99,80 are bin number, their values are obtained from bins_dict
x1,y1 = bins_dict[10], bins_dict[0]
x2,y2 = bins_dict[99], bins_dict[90]
Equation of straight line:
y-y1 = y2-y1 * (x-x1)
-----
x2-x1
boundary: (x2-x1) * (y-y1) - (y2-y1) * (x-x1)
N = 100
bins = np.linspace(0, max_abs_xcol_ycol,N+1)
bins_dict = {i:v for i,v in enumerate(bins)}
# points from contour plot
x1y1x2y2 = [10,0,99,90]
print(x1y1x2y2)
# actual values of x1, y1, x2, and y2
x1,y1 = bins_dict[x1y1x2y2[0]], bins_dict[x1y1x2y2[1]]
x2,y2 = bins_dict[x1y1x2y2[2]], bins_dict[x1y1x2y2[3]]
df['above'] = df.eval(" ( (@x2-@x1) * gmd_sq ) >= ( (@y2-@y1) * (g_sq - @x1 )) ")
print(df['above'].value_counts())
print()
print(df['above'].value_counts(normalize=True))
df[df.above==True]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='b',label='Above')
nabove = len(df[df.above==True])
nbelow = len(df[df.above==False])
above_pct = nabove/(nabove+nbelow)*100
below_pct = 100-above_pct
text = f'Above = {nabove:,} ({above_pct:,.0f}%)\nBelow = {nbelow:,} ( {below_pct:,.0f}%)'
plt.text(0.5,10_000,text,fontsize=14)
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms for above and below cases', fontsize=20)
df[df.above==False]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5,label='Below')
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms below the boundary', fontsize=20)
df[df.above==True]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5)
plt.xlabel('gm_sq')
plt.title('gm_sq histogram', fontsize=20)
df[df.above==False]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5,label='Below')
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms below the boundary', fontsize=20)
rows_before = df.shape[0]
df = df[df.above==True]
print(f'Before rows: {rows_before}, After rows: {df.shape[0]}')
plt.figure(figsize=(12,8))
sns.distplot(df['gm_sq'],label='gm_sq')
plt.legend()
plt.savefig('images/gmsq_histogram_after_cleaning.png',dpi=300)
Equation of straight line:
y-y1 = y2-y1 * (x-x1)
-----
x2-x1
boundary: (x2-x1) * (y-y1) - (y2-y1) * (x-x1)
fig,ax = plt.subplots(1,2,figsize=(16,8))
df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])
ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)
ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)
# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);
# Two scatter plots
#===========================================
fig,ax = plt.subplots(1,2,figsize=(16,8))
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);
df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])
ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)
ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)
# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')
# Left plot gm0 vs gc0 parallel lines
#===========================================
x = df['gm[0]'].to_numpy()
y = df['gc[0]'].to_numpy()
# Find distance for parallel lines
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2)
y1 = x + c
y2 = x - c
# left plot parallel lines
ax[0].plot(y1,x,'b-.',label=f'{n} sigma')
ax[0].plot(y2,x,'b-.')
# Right plot gm1 vs gc1 parallel lines
#===========================================
x = df['gm[1]'].to_numpy()
y = df['gc[1]'].to_numpy()
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2)
y1 = x + c
y2 = x - c
ax[1].plot(y1,x,'b-.', label=f'{n} sigma')
ax[1].plot(y2,x,'b-.')
#=============
# Add legends
# plt.tight_layout()
ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')
plt.show()
# Remove outliers for gc0 vs gm0
n = 10
sigma = (df['gm[0]'] - df['gc[0]']).std()
d = n * sigma
c = d/np.sqrt(2)
cond_upper = (df['gc[0]'] - df['gm[0]'] <= c)
cond_below = (df['gm[0]'] - df['gc[0]'] <= c)
df = df[ cond_upper & cond_below ]
# df.plot.scatter(x='gm[0]',y='gc[0]')
# Remove outliers for gc1 vs gm1
n = 10
sigma = (df['gm[1]'] - df['gc[1]']).std()
d = n * sigma
c = d/np.sqrt(2)
cond_upper = (df['gc[1]'] - df['gm[1]'] <= c)
cond_below = (df['gm[1]'] - df['gc[1]'] <= c)
df = df[ cond_upper & cond_below ]
# df.plot.scatter(x='gm[1]',y='gc[1]')
# Two scatter plots
#===========================================
fig,ax = plt.subplots(1,2,figsize=(16,8))
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);
df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])
ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)
ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)
# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')
# Left plot gm0 vs gc0 parallel lines
#===========================================
x = df['gm[0]'].to_numpy()
y = df['gc[0]'].to_numpy()
# Find distance for parallel lines
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2)
y1 = x + c
y2 = x - c
# left plot parallel lines
ax[0].plot(y1,x,'b-.',label=f'{n} sigma')
ax[0].plot(y2,x,'b-.')
# Right plot gm1 vs gc1 parallel lines
#===========================================
x = df['gm[1]'].to_numpy()
y = df['gc[1]'].to_numpy()
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2)
y1 = x + c
y2 = x - c
ax[1].plot(y1,x,'b-.', label=f'{n} sigma')
ax[1].plot(y2,x,'b-.')
#=============
# Add legends
# plt.tight_layout()
ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')
plt.show()
df.filter(regex='mag').head(2)
df[['gm_sq','gc_sq']].describe()
def plot_bin_mag_mono_chro(nbins,show=False,ret=False):
import os
if not os.path.isdir('images'):
os.makedirs('images')
df['bins_mag_mono'] = pd.cut(df['mag_mono'],nbins)
df['bins_mag_chro'] = pd.cut(df['mag_chro'],nbins)
# show the text of bin counts
text_mono = df.groupby('bins_mag_mono')['gm_sq'].agg(['count', 'mean'])
text_mono = text_mono.reset_index().assign(pct_change = lambda x: x['mean'].pct_change())
text_mono = text_mono.to_string().replace('mean','gm_sq')
text_chro = df.groupby('bins_mag_chro')['gc_sq'].agg(['count', 'mean'])
text_chro = text_chro.reset_index().assign(pct_change = lambda x: x['mean'].pct_change())
text_chro = text_chro.to_string().replace('mean','gc_sq')
# data to plot
df_mono_gm_sq_mean = df.groupby('bins_mag_mono')['gm_sq'].mean()
df_chro_gm_sq_mean = df.groupby('bins_mag_chro')['gc_sq'].mean()
# plot
fig,ax = plt.subplots(1,2,figsize=(12,8))
# mono (plot the magnitude mean in each bins)
df_mono_gm_sq_mean.plot(marker='o',ax=ax[0])
ax[0].tick_params(axis='x', rotation=90)
ax[0].set_ylabel('gm_sq_mean',fontsize=18)
ax[0].set_xlabel('bin_mag_mono',fontsize=18)
ax[0].set_title(f'gm_sq per magnitude bins with nbins = {nbins}')
ax[0].text(0,0.5,text_mono,fontsize=14,va='center')
ax[0].set_ylim(0,1)
ax[0].set_yticks(np.arange(0, 1, step=0.1))
# chro
df_chro_gm_sq_mean.plot(marker='o',ax=ax[1])
ax[1].tick_params(axis='x', rotation=90)
ax[1].set_ylabel('gc_sq_mean',fontsize=18)
ax[1].set_xlabel('bin_mag_chro',fontsize=18)
ax[1].set_title(f'gc_sq per magnitude bins with nbins = {nbins}')
ax[1].text(0,0.5,text_chro,fontsize=14,va='center')
ax[1].set_ylim(0,1)
ax[1].set_yticks(np.arange(0, 1, step=0.1))
plt.tight_layout()
plt.savefig(f'images/bin_mag_mono_chro_{nbins}.png')
if show:
plt.show()
plt.close()
if ret:
return df_mono_gm_sq_mean, df_chro_gm_sq_mean
for nbins in range(5,15):
plot_bin_mag_mono_chro(nbins,show=True)
"""
Note:
These plots were much different previously without doing some filterings:
https://nbviewer.jupyter.org/github/bpRsh/2019_shear_analysis_after_dmstack/blob/master/Jan_2020/a01_jan8/a01_cleancat15_gc0_gm0.ipynb
""";
df['mag_mono'].plot.hist(bins=100)
nbins = 9 # the bin number looks good in above gm_sq plots
df_mono, df_chro = plot_bin_mag_mono_chro(nbins,show=True,ret=True)
# from plot above I can see from Kth point graph goes up.
# graph is flat upto Kth point then increases linearly to top of curve.
num_start_increasing = 3
df_mono.index[num_start_increasing]
# how to choose the bottom of the slope mathematically?
# try to see pct change
df_mono.pct_change().to_frame('gm_sq_pct_change').style.background_gradient(low=0,high=0.9)
# look at smaller part than
df_mono_left = df_mono.pct_change()\
.loc[df_mono.pct_change().index < df_mono.pct_change().idxmax()]
df_mono_left
# df_mono_left.loc[lambda x: x<0.2] # take pct change less than 0.2
# df_mono_left.loc[lambda x: x<0.2].shape # this gives the number 3
# # but this is not full-proof method, simply look at the plot and choose the
# # bottom of slope visually.
# df_mono.to_frame().style.background_gradient()
# df_mono.index[num_start_increasing].left, df_mono.index[num_start_increasing].right
# df_mono.idxmax() # index for peak of curve
df_mono.idxmax().left, df_mono.idxmax().right
# look at case when nbinsK = 9 and when the curve is going up
# index for when curve goes linearly up after initial flat
idx_bottom_of_slope_mono = [df_mono.index[num_start_increasing].left,
df_mono.index[num_start_increasing].right
]
mag_low_nbinsK_mono = np.mean(idx_bottom_of_slope_mono)
# index for peak of curve
idx_peak_mono = [df_mono.idxmax().left,
df_mono.idxmax().right
]
mag_high_nbinsK_mono = np.mean(idx_peak_mono)
idx_bottom_of_slope_mono, idx_peak_mono, mag_low_nbinsK_mono, mag_high_nbinsK_mono
from scipy.optimize import curve_fit
xcol = 'mag_mono'
ycol = 'gm_sq'
x = df.query(""" @mag_low_nbinsK_mono < mag_mono < @mag_high_nbinsK_mono """)[xcol].to_numpy()
y = df.query(""" @mag_low_nbinsK_mono < mag_mono < @mag_high_nbinsK_mono """)[ycol].to_numpy()
def func(x, a, b):
return a*x + b
params, _ = curve_fit(func, x, y)
[a, b] = params.round(2)
print(f'magnitude ranges for mono: {mag_low_nbinsK_mono}, {mag_high_nbinsK_mono}')
print(f'fitting params for mono: {a}, {b}' )
mag_low_nbinsK_mono
def magnitude_weight_mono(mag):
if mag < mag_low_nbinsK_mono:
return 1/ (mag_low_nbinsK_mono *a + b)
else:
return 1/ (a*mag + b)
df['wt_mag_mono'] = df['mag_mono'].apply(magnitude_weight_mono)
df['wt_mag_mono'] = df['wt_mag_mono'] / df['wt_mag_mono'].mean() # normalize by mean
df['mag_chro'].plot.hist(bins=100)
nbins = 9 # the bin number looks good in above gm_sq plots
df_mono, df_chro = plot_bin_mag_mono_chro(nbins,show=True,ret=True)
# from plot above I can see from Kth point graph goes up.
# graph is flat upto Kth point then increases linearly to top of curve.
num_start_increasing = 3
df_chro.index[num_start_increasing]
# look at case when nbinsK = 9 and when the curve is going up
# index for when curve goes linearly up after initial flat
idx_bottom_of_slope_chro = [df_chro.index[num_start_increasing].left,
df_chro.index[num_start_increasing].right
]
mag_low_nbinsK_chro = np.mean(idx_bottom_of_slope_chro)
# index for peak of curve
idx_peak_chro = [df_chro.idxmax().left,
df_chro.idxmax().right
]
mag_high_nbinsK_chro = np.mean(idx_peak_chro)
idx_bottom_of_slope_chro, idx_peak_chro, mag_low_nbinsK_chro, mag_high_nbinsK_chro
from scipy.optimize import curve_fit
xcol = 'mag_chro'
ycol = 'gc_sq'
x = df.query(""" @mag_low_nbinsK_chro < mag_chro < @mag_high_nbinsK_chro """)[xcol].to_numpy()
y = df.query(""" @mag_low_nbinsK_chro < mag_chro < @mag_high_nbinsK_chro """)[ycol].to_numpy()
def func(x, a, b):
return a*x + b
params, _ = curve_fit(func, x, y)
[a, b] = params.round(2)
print(f'magnitude ranges for chro: {mag_low_nbinsK_chro}, {mag_high_nbinsK_chro}')
print(f'fitting params for chro: {a}, {b}' )
mag_low_nbinsK_chro
def magnitude_weight_chro(mag):
if mag < mag_low_nbinsK_chro:
return 1/ (mag_low_nbinsK_chro*a + b)
else:
return 1/ (a*mag + b)
df['wt_mag_chro'] = df['mag_chro'].apply(magnitude_weight_chro)
df['wt_mag_chro'] = df['wt_mag_chro'] / df['wt_mag_chro'].mean() # normalize by mean
# mean
df['wt_mag'] = (df['wt_mag_mono'] + df['wt_mag_chro']) / 2
# df.drop(['wt_mag_chro','wt_mag_mono'],axis=1,inplace=True)
df.iloc[:2,-7:]
c2 = (dx * dx - dy * dy) / (r * r);
s2 = 2 * dx * dy / (r * r);
eX = s2 * e[0] + c2 * e[1];
eesum += eX * eX * w[0] * w[0];
eTsum[bin] -= (c2 * e[0] + s2 * e[1]) * w[0];
df.head(2)
# constants
RMIN = 10
DLNR = 0.5
df['dx'] = df['x[0]'] - 1699 # jesisim output fitsfiles have shape 3398, 3398
df['dy'] = df['x[1]'] - 1699
df['r'] = np.hypot(df['dx'], df['dy'])
# df['r'] = np.sqrt(df['dx']**2 + df['dy']**2)
df['cos2theta'] = df.eval(' (dx * dx - dy * dy) / (r * r)' )
df['sin2theta'] = df.eval(' (2 * dx * dy ) / (r * r)' )
df['bin'] = ( np.log(df.r / RMIN) / DLNR).astype(int)
df['bin'].value_counts()
df['eX_mono'] = df['sin2theta'] * df['gm[0]'] + df['cos2theta'] * df['gm[1]']
df['eT_mono'] = -1 * (df['cos2theta'] * df['gm[0]'] + df['sin2theta'] * df['gm[1]'] )
df['eX_chro'] = df['sin2theta'] * df['gc[0]'] + df['cos2theta'] * df['gc[1]']
df['eT_chro'] = -1 * (df['cos2theta'] * df['gc[0]'] + df['sin2theta'] * df['gc[1]'] )
df['eT_mono_times_wt'] = df['eT_mono'] * df['wt_mag']
df['eT_chro_times_wt'] = df['eT_chro'] * df['wt_mag']
df['eX_monosq_times_wt'] = df['eX_mono'] * df['eX_mono'] * df['wt_mag']
df['eX_chrosq_times_wt'] = df['eX_chro'] * df['eX_chro'] * df['wt_mag']
df['eXsq_times_wt'] = df.eval(" (eX_monosq_times_wt + eX_chrosq_times_wt) / 2 ")
df.iloc[:2,-6:]
https://www.phenix.bnl.gov/WWW/publish/elke/EIC/Files-for-Wiki/lara.02-008.errors.pdf
When the statistics involved in calculating $E_A$ and $E_B$ are not indendent, the error for a function $f(E_A, E_B)$ has the expression: $$ \sigma_{f}=\sqrt{\left(\frac{\partial f}{\partial E_{A}} \sigma_{A}\right)^{2}+\left(\frac{\partial f}{\partial E_{B}} \sigma_{B}\right)^{2}+2 \frac{\partial f}{\partial E_{A}} \frac{\partial f}{\partial E_{B}} \operatorname{cov}\left(E_{A}, E_{B}\right)} $$
where the last term takes care of the correlations between $E_A$ and $E_B$. Given a large number $N$ of measurements $E_{A_i}$, the standard deviation $\sigma_A$ is empirically defined as:
$$ \sigma_{A}^{2}=\frac{1}{N-1} \sum_{i=1}^{N}\left(E_{A_{i}}-E_{A}\right)^{2} $$while the covariance between $E_A$ and $E_B$ is given by: $$ \operatorname{cov}\left(E_{A}, E_{B}\right)=\frac{1}{N-1} \sum_{i=1}^{N}\left(E_{A_{i}}-E_{A}\right)\left(E_{B_{i}}-E_{B}\right) $$
where $E_A$ and $E_B$ are the averages of $E_{A_i}$ and $E_{B_i}$. When $E_A$ and $E_B$ are independent, over a large number $N$ of measurements they will fluctuate around their average in an uncorrelated way, so that the covariance is zero and one recovers the usual formula for the propagation of errors in a function of independent variables.
"""
f = m/c
df/dm = 1/c
df/dc = -m/c2
s2 ==> sigma
s2 = r2 (sm2/m2 + sc2/c2 -2/m/c cov(m,c))
put m=c,
s2 = 2/m2 (sm2 - cov)
error = sigma/sqrt(n)
error =
0.000332
sqrt --------
eT(bin)**2
---------------------
sqrt(ngals_bin)
""";
cov = df[['eT_chro','eT_mono']].cov()
cov
diag_mean = np.diag(cov).mean()
diag_mean
offdiag = cov.iloc[0,1]
offdiag
diag_mean - offdiag
myerr = (diag_mean - offdiag) * 2
myerr
# temporary value to calculate error
# err_coeff = 0.000332
err_coeff = myerr
df['err_numerator'] = myerr # np.sqrt(err_coeff/df['eT_mono']/df['eT_mono'])
df['eX_monosq_times_wt_std'] = df['eX_monosq_times_wt'].std()
df['eX_chrosq_times_wt_std'] = df['eX_chrosq_times_wt'].std()
df['eT_ratio'] = df['eT_mono'] / df['eT_chro']
df['eT_ratio_std'] = df['eT_ratio'].std()
df.iloc[:2,-5:]
# constants
RMIN = 10
DLNR = 0.5
# renaming aggegration needs pandas > 0.25
pd.__version__
df.filter(regex='times|err_numerator').head().round(1)
df_radial_bins = df.groupby('bin').agg(
r_mean = ('r', 'mean'),
wt_mag_sum = ('wt_mag', 'sum'),
eT_mono_times_wt_sum = ('eT_mono_times_wt', 'sum'),
eT_chro_times_wt_sum = ('eT_chro_times_wt', 'sum'),
eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
err_numerator_mean = ('err_numerator', 'mean')
)
# bin counts
df_radial_bins['bin_count'] = df['bin'].value_counts()
# columns after binning
df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eX_mean_mono'] = np.sqrt(df_radial_bins['eX_monosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eX_mean_chro'] = np.sqrt(df_radial_bins['eX_chrosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eT_ratio'] = (df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro'])
# Errors
sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro']) /
sqrt_bin)
print('Statistics for different radial bins')
print(f'RMIN = {RMIN} and DLNR = {DLNR}')
df_radial_bins.style\
.background_gradient(subset=['eT_ratio_err'],cmap='Blues')
# why some eT values are -ve?
"""
1. For given rmin and dlnr we have some bins very few object counts.
""";
pd.cut(df['eT_mono'],20).value_counts().to_frame().style.background_gradient()
fig,ax = plt.subplots(figsize=(12,8))
sns.distplot(df['eT_mono'],ax=ax)
plt.title('Histogram and density plot of eT_mono');
df['wt_mag'].hist(bins=100,figsize=(12,8));
plt.title('Histogram of wt_mag')
plt.xlabel('wt_mag')
plt.ylabel('Frequency');
plt.savefig('images/weights.png',dpi=300)
df.query(""" r>500 """)['eT_mono'].describe().to_frame()
df['gm_sq'].hist(bins=100,figsize=(12,8))
plt.title('Histogram of gm_sq')
plt.ylabel('Frequency')
plt.xlabel('gm_sq')
plt.savefig('images/gm_sq_hist_after_cleaning.png',dpi=300)
df.head(2)
def plot_eT_mean(rmin, dlnr,min_bin_count,
show_ratio=True,
show_mono_chro=False,
show_data=True):
# title
title = f'rmin={rmin}, dlnr={dlnr}, min_bin_count={min_bin_count}'
# radial bin
df['bin'] = ( np.log(df.r / rmin) / dlnr).astype(int)
# aggregations per given bin
df_radial_bins = df.groupby('bin').agg(
r_mean = ('r', 'mean'),
wt_mag_sum = ('wt_mag', 'sum'),
eT_mono_times_wt_sum = ('eT_mono_times_wt', 'sum'),
eT_chro_times_wt_sum = ('eT_chro_times_wt', 'sum'),
eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
err_numerator_mean = ('err_numerator', 'mean')
)
# bin counts
df_radial_bins['bin_count'] = df['bin'].value_counts()
df_radial_bins = df_radial_bins.query(""" bin_count > @min_bin_count """)
df_radial_bins = df_radial_bins.iloc[:-1,:]
# columns after binning
df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eX_mean_mono'] = np.sqrt(
df_radial_bins['eX_monosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum']
)
df_radial_bins['eX_mean_chro'] = np.sqrt(
df_radial_bins['eX_chrosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum']
)
df_radial_bins['eT_ratio'] = (df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro'])
# Errors
sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro']) /
sqrt_bin)
# also plot eT mono and eT chro
if show_ratio:
fig, ax = plt.subplots(1,1,figsize=(12,8))
df_radial_bins.plot.line(x='r_mean',y='eT_ratio',
yerr='eT_ratio_err',
marker='o',color='b',ax=ax)
ax.set_xlabel('Radius',fontsize=18)
ax.set_ylabel(r'$\frac{eT_{mono}}{eT_{chro}}$',fontsize=18)
plt.ylim(0.95, 1.0)
if show_mono_chro:
fig, ax = plt.subplots(3,1,figsize=(14,12))
# top
df_radial_bins.plot.line(x='r_mean',y='eT_mean_mono',
yerr='eT_mono_err',
marker='o',color='r', ax=ax[0])
# top
df_radial_bins.plot.line(x='r_mean',y='eT_mean_chro',
yerr='eT_chro_err',
marker='o',color='b',ax=ax[0])
# middle
df_radial_bins.plot.line(x='r_mean',y='eT_mean_mono',
yerr='eT_mono_err',
marker='o',color='r', ax=ax[1])
# bottom
df_radial_bins.plot.line(x='r_mean',y='eT_mean_chro',
yerr='eT_chro_err',
marker='o',color='b',ax=ax[2])
# label
ax[0].set_xlabel('')
ax[1].set_xlabel('')
ax[2].set_xlabel('Radius',fontsize=18)
ax[0].set_ylabel('eT',fontsize=18)
ax[1].set_ylabel('eT_mean_mono',fontsize=18)
ax[2].set_ylabel('eT_mean_chro',fontsize=18)
ax[0].set_xlim(0,1800)
ax[1].set_xlim(0,1800)
ax[2].set_xlim(0,1800)
plt.suptitle(f'Plot of eT for {title}',fontsize=24,weight='bold')
# also show the dataframe
if show_data:
display(df_radial_bins.style.background_gradient(subset=['eT_mean_mono']))
plt.xlim(0,1800)
plt.savefig('images/eT_versus_radius.png',dpi=300)
plt.tight_layout()
plt.show()
# plot now
plot_eT_mean(rmin=300, dlnr=0.5,min_bin_count=10,
show_ratio=True,show_mono_chro=False)
plot_eT_mean(rmin=300, dlnr=0.5,min_bin_count=10,
show_ratio=False,show_mono_chro=True)
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
df.filter(regex='wt').head(2)
# I need pandas > 0.25 for multiple agg
pd.__version__
def plotly_eT_plot(rmin=400, dlnr=0.4, min_bin_count=20,
show_mono_chro=False,
show_mono=False,
show_chro=False,
show_data=True,
):
# title
title=f'rmin={rmin}, dlnr={dlnr}, min_bin_count={min_bin_count}'
# radial bin
df['bin'] = ( np.log(df.r / rmin) / dlnr).astype(int)
# aggregations per given bin
df_radial_bins = df.groupby('bin').agg(
r_mean = ('r', 'mean'),
wt_mag_sum = ('wt_mag', 'sum'),
eT_mono_times_wt_sum = ('eT_mono_times_wt', 'sum'),
eT_chro_times_wt_sum = ('eT_chro_times_wt', 'sum'),
eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
err_numerator_mean = ('err_numerator', 'mean')
)
# bin counts
df_radial_bins['bin_count'] = df['bin'].value_counts()
df_radial_bins = df_radial_bins.query(""" bin_count > @min_bin_count """)
df_radial_bins = df_radial_bins.iloc[:-1,:]
# columns after binning
df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
df_radial_bins['wt_mag_sum'])
df_radial_bins['eX_mean_mono'] = np.sqrt(
df_radial_bins['eX_monosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum']
)
df_radial_bins['eX_mean_chro'] = np.sqrt(
df_radial_bins['eX_chrosq_times_wt_sum'] /
df_radial_bins['wt_mag_sum']
)
df_radial_bins['eT_ratio'] = (df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro'])
# Errors
sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
df_radial_bins['eT_mean_mono'] /
df_radial_bins['eT_mean_chro']) /
sqrt_bin)
# remove first bin of strong lensing
df_radial_bins = df_radial_bins.iloc[1:,:]
# eT mono vs chro ratio
sc = go.Scatter( x=df_radial_bins['r_mean'],
y=df_radial_bins['eT_ratio'],
mode = 'lines+markers',
name = 'eT ratio',
error_y = dict(
type='data',
array=df_radial_bins['eT_ratio_err'],
visible=True,
)
)
mydata = [sc]
# layout
layout = go.Layout(
title={
'text': title,
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'},
xaxis=dict(
title='radius',
titlefont=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f')),
yaxis=dict(title='eT',
titlefont=dict(
family='Courier New, monospace',
size=18,
color='#7f7f7f')))
myfig = go.Figure(data=mydata, layout=layout)
# also plot eT mono and chro
# monochromatic
sc1 = go.Scatter(x=df_radial_bins['r_mean'],
y=df_radial_bins['eT_mean_mono'],
mode = 'lines+markers',
name = 'eT mono',
error_y = dict(
type='data',
array=df_radial_bins['eT_mono_err'],
visible=True,
)
)
# chromatic
sc2 = go.Scatter(x=df_radial_bins['r_mean'],
y=df_radial_bins['eT_mean_chro'],
mode = 'lines+markers',
name = 'eT chro',
error_y = dict(
type='data',
array=df_radial_bins['eT_chro_err'],
visible=True,
)
)
if show_mono_chro:
mydata= [sc1,sc2]
myfig = go.Figure(data=mydata, layout=layout)
if show_mono:
mydata = [sc1]
myfig = go.Figure(data=mydata, layout=layout)
myfig.update_layout(
xaxis_title="Radius",
yaxis_title="eT_mono",
)
myfig.update_layout(
title={
'text': "eT_mono vs radius plot",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'}
)
if show_chro:
mydata = [sc2]
myfig = go.Figure(data=mydata, layout=layout)
myfig.update_layout(
xaxis_title="Radius",
yaxis_title="eT_chro",
)
myfig.update_layout(
title={
'text': "eT_chro vs radius plot",
'y':0.9,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
# this is for all cases
myfig.update_layout(showlegend=True)
# also show the dataframe
if show_data:
display(df_radial_bins.style.background_gradient(subset=['eT_mean_mono']))
# iplot plots in jupyter-notebook, plot opens in new tab.
return iplot(myfig, filename='myplot.html')
# plot
plotly_eT_plot(rmin=400, dlnr=0.4, min_bin_count=20,
show_mono_chro=False,
show_mono=False,
show_chro=False,
show_data=True,
)
plotly_eT_plot(rmin=300, dlnr=0.5,min_bin_count=20,show_mono_chro=True)
plotly_eT_plot(rmin=300, dlnr=0.5,min_bin_count=20,show_data=False,show_mono=True)
plotly_eT_plot(rmin=300, dlnr=0.5,min_bin_count=20,show_data=False,show_chro=True)
which pip
pip install ipywidgets
jupyter nbextension enable --py --sys-prefix widgetsnbextension
import ipywidgets as widgets
from ipywidgets import HBox, VBox
from ipywidgets import interactive
rmin_ = widgets.IntSlider(min=0, max=500, step=50, value=300)
dlnr_ = widgets.FloatSlider(min=0.1,max=0.8,step=0.05,value=0.5)
min_bin_count_ = widgets.IntSlider(min=10, max=600, step=20, value=20)
interactive_plot = interactive(plotly_eT_plot,
rmin=rmin_,
dlnr=dlnr_,
min_bin_count=min_bin_count_)
output = interactive_plot.children[-1]
interactive_plot
time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
'{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))
import subprocess
subprocess.call(['python', '-m', 'nbconvert', '*.ipynb'])